********************************************************************************
** Cederman, Galano, Girardin and Schvitz. War Did Make States.
** Article prepared for International Organization
** June 20, 2022
**
** Stata do-file: data_prep_cent1.do
** First data preparation file for state-level data (Centennia)
** Required file paths set in runall.do
********************************************************************************

cd $ROOT
cd $INPUTDIR

insheet using $INPUTFILE, clear



drop if id == .
drop if year == .
xtset id year
sort id year

cd $ROOT
cd $INTERMEDIATEDIR
merge 1:1 id year using centennia_coast
drop _merge


drop if id == .
drop if year == .
xtset id year
sort id year


gen inc1 = .
replace inc1 = 0 if incidencebrecke==0
replace inc1 = 1 if incidencebrecke>0 & incidencebrecke!=.

// Wars during entire lifespan
bys id: egen sumwaryears = sum(inc1)
bys id: egen sumyears = sum(1)
gen relwaryears = sumwaryears/sumyears

// Cumulative war years 
bysort id (year): gen waryears = sum(inc1)
gen lwaryears = l5.waryears

// Birth and death variables
xtset id year

bys id: egen maxyear = max(year)
bys id: egen minyear = min(year)

gen birth = 0
replace birth = 1 if id!=. & l5.id==. 
gen firstbirth = 0
replace firstbirth = 1 if birth==1 & year==minyear

***
gen death = 0
replace death = 1 if id!=. & f5.id==. & year<1790
gen finaldeath = 0
replace finaldeath = 1 if year == maxyear & year<1790

// death history variables
bysort id (year): gen deaths = sum(death)
gen deaths1 = 0
replace deaths1 = 1 if deaths > 0 & deaths != .

// duration dependence var.
btscs death year id, gen(lifeyears) nspline(3)
rename _spline1  spline1
rename _spline2  spline2
rename _spline3  spline3

gen llarea = log(l5.area)
gen linc1 = l5.inc1

bys id (year): gen age = sum(5)
gen llage = log(l5.age+5)

replace growthwarbreckearea = 0 if growthwarbreckearea == .
replace growthpeacebreckearea = 0 if growthpeacebreckearea == .

gen llwargrowth = log(l5.growthwarbreckearea + 1)
gen llpeacegrowth = log(l5.growthpeacebreckearea + 1)


replace shrinkwarbreckearea = 0 if shrinkwarbreckearea == .
replace shrinkpeacebreckearea = 0 if shrinkpeacebreckearea == .

gen llwarshrink = log(l5.shrinkwarbreckearea + 1)
gen llpeaceshrink = log(l5.shrinkpeacebreckearea + 1)


gen netwargrowth = growthwarbreckearea - shrinkwarbreckearea
gen netpeacegrowth = growthpeacebreckearea - shrinkpeacebreckearea


replace gainarea = 0 if gainarea == .
replace lossarea = 0 if lossarea == .
gen netgain = gainarea - lossarea

gen gain = netgain
replace gain = 0 if netgain<0 & netgain!=.

gen loss = 0
replace loss = abs(netgain) if netgain<0 & netgain!=.
 
gen loss_nd = loss
replace loss_nd = . if death==1

 //////

 
gen lngain = log(gain + 1)
gen lnloss = log(loss + 1)
gen lnloss_nd = log(loss_nd + 1)

gen rloss = 0
replace rloss = loss/l5.area

gen lnrloss = log(rloss+1)

gen rloss_nd = .
replace rloss_nd = loss_nd/l5.area
gen lnrloss_nd = log(rloss_nd + 1)


 
gen gain1 = 0 
replace gain1 = 1 if netgain > 0 & netgain!=.

gen loss1 = 0
replace loss1 = 1 if loss > 0 & loss!=.

gen loss_nd1 = loss_nd
replace loss_nd1 = 1 if loss_nd>0 



/////////////////

xtset id year
gen spellonset = 0
replace spellonset = 1 if l5.area!=l10.area
bys id (year): gen spellnum = sum(spellonset)
gen idstr = string(id)+"."+string(spellnum)
egen spell = group(idstr)

bys spell: egen minspellyear = min(year)
gen spellyear = year - minspellyear + 5

btscs gain1 year id, gen(nogainyear) nspline(3)
rename _spline1  ngspline1
rename _spline2  ngspline2
rename _spline3  ngspline3

btscs loss1 year id, gen(nolossyear) nspline(3)
rename _spline1  nlspline1
rename _spline2  nlspline2
rename _spline3  nlspline3



gen event = gain1
replace event = 1 if loss1 == 1

btscs event year id, gen(noeventyear) nspline(3)
rename _spline1  evspline1
rename _spline2  evspline2
rename _spline3  evspline3

cd $ROOT
cd $INTERMEDIATEDIR
save "statedata_intermediate.dta", replace 
